### Libraries ------------------------------------------------------------------
library(dplyr) # for pipelines
library(cobalt) # for the balance test
library(tibble) # to convert rownames to a variable
library(stargazer)

##### Data import --------------------------------------------------------------
difdef <- read_csv("data/difdef.csv")

##### Balance checks -----------------------------------------------------------
covs <-  difdef |>
  mutate(treat = case_when(actor == "judge" ~ 1,
                           actor == "civil servant" ~ 2,
                           actor == "military official" ~ 3,
                           actor == "out-party" ~ 4,
                           actor == "mayor" ~ 5,
                           actor == "journalist" ~ 6,
                           actor == "NGO" ~ 7,
                           actor == "academic" ~ 8,
                           actor == "in-party" ~ 9)) |> 
  dplyr::select(
    # treatment
      treat,
    
    # potential confounders
    inparty, leri, dem_satis, dem_sup, pol_interest, pol_news, pol_trust_inparty,
    pol_trust_govt, pol_trust_outparty, pol_trust_judges, pol_trust_journ, pol_trust_part,
    pol_trust_mayor, pol_trust_parl, pol_trust_acad, pol_trust_ngo, pol_trust_mili,
    Age, Sex, ethnicity, country_birth, country_residence, Nationality,
    Language, student, employment)

balance <- bal.tab(data = covs,
                   thresholds = c(m = 0.05)[[1]],
                   x = treat ~ 
                     inparty + leri + dem_satis + dem_sup + pol_interest + pol_news + pol_trust_inparty + 
                     pol_trust_govt + pol_trust_outparty + pol_trust_judges + pol_trust_journ + pol_trust_part +
                     pol_trust_mayor + pol_trust_parl + pol_trust_acad + pol_trust_ngo + pol_trust_mili + 
                     Age + Sex + ethnicity + country_birth + country_residence + Nationality + 
                     Language + student + employment)
                              
balance2 <- balance$Balance |> 
  rownames_to_column(var = "variable") |> 
  dplyr::select(variable, Corr.Un, R.Threshold.Un) |> 
  rename(correlation = Corr.Un,
         balance = R.Threshold.Un) |> 
  mutate(correlation = round(correlation, 3)) |> 
  arrange(desc(abs(correlation))) |> 
  top_n(n = 20,
        wt = abs(correlation))

# Table 
stargazer(balance2,
          summary = FALSE,
          title = "20 most unbalanced covariates",
          label = "tab:balance",
          type = "latex",
          out = "tables/balance.tex")

unbalanced_language <- difdef |> filter(Language == "Dutch")

# /./ End of code /./ #